#!/usr/bin/env python
# SPDX-License-Identifier: MIT
# Copyright (C) 2021 Even Rouault

""" This scripts merges the GRIB2 tables from DEGRIB, which are in the
    frmts/grib/degrib/data directory with the ones from WMO.
"""

import csv
import os
import re
import requests

# URL of the WMO GRIB2 tables repository
wmo_github_repo = 'wmo-im/GRIB2'
wmo_github_tag = 'v28.1'
wmo_base_url = f'https://raw.githubusercontent.com/{wmo_github_repo}/{wmo_github_tag}'

script_dir_name = os.path.dirname(os.path.realpath(__file__))

# Input directory with degrib tables
degrib_table_input_dir = os.path.join(script_dir_name, 'data')
assert os.path.exists(degrib_table_input_dir)

# Output directory of merged tables (${repository_root}/data)
output_dir = os.path.join(os.path.dirname(
    os.path.dirname(os.path.dirname(script_dir_name))), 'data')
assert os.path.exists(output_dir)

with open(os.path.join(output_dir, "grib2_table_versions.csv"), "wt") as f:
    f.write("component,version\n")
    f.write(f"wmo,{wmo_github_tag}\n")
    f.write(f"degrib,2.25\n")

r = requests.get(f"https://api.github.com/repos/{wmo_github_repo}/commits/HEAD", headers={
                 "Accept": "application/vnd.github.VERSION.sha"})
wmo_table_sha1 = r.content.decode('utf-8')
warning_msg = 'DO NOT MODIFY THIS FILE. It is generated by frmts/grib/degrib/merge_degrib_and_wmo_tables.py'
warning2_msg = f'from tables at version https://github.com/wmo-im/GRIB2/commit/{wmo_table_sha1}'
warning_len = max(len(warning_msg), len(warning2_msg))

# Processes table 4.5 "Fixed surface types and units"


def process_table_4_5():

    # Ingest table from degrib
    degrib_csv_reader = csv.DictReader(
        open(os.path.join(degrib_table_input_dir, 'grib2_table_4_5.csv'), newline=''))
    assert degrib_csv_reader.fieldnames == [
        'code', 'short_name', 'name', 'unit']
    records = [None for i in range(256)]
    for row in degrib_csv_reader:
        code = int(row['code'])
        assert records[code] is None
        row['code'] = code
        records[code] = row

    # Ingest table from WMO
    r = requests.get(wmo_base_url + '/GRIB2_CodeFlag_4_5_CodeTable_en.csv')
    lines = r.content.decode('utf-8').split('\n')
    wmo_csv_reader = csv.DictReader(lines)
    assert wmo_csv_reader.fieldnames == ['Title_en', 'SubTitle_en', 'CodeFlag',
                                         'Value', 'MeaningParameterDescription_en', 'Note_en', 'UnitComments_en', 'Status']
    wmo_records = [None for i in range(256)]
    for row in wmo_csv_reader:
        code = row['CodeFlag']
        if '-' in code:
            # Deal with ranges (reserved codes)
            min_val, max_val = code.split('-')
            min_val = int(min_val)
            max_val = int(max_val)
            assert min_val < max_val
        else:
            min_val = int(code)
            max_val = int(code)
        for code in range(min_val, max_val+1):
            assert wmo_records[code] is None
            wmo_records[code] = {'code': code,
                                 'name': row['MeaningParameterDescription_en'],
                                 'unit': row['UnitComments_en']}

    # Merge records from WMO into degrib
    # degrib ones have the precedence when they exist since they contain
    # short_name.
    for code in range(len(records)):
        if wmo_records[code] is not None:
            if records[code] is None or \
                ('Reserved' in records[code]['name']
                 and not 'Reserved' in wmo_records[code]['name']):
                unit = wmo_records[code]['unit']
                unit = '-' if unit == '' else unit
                records[code] = {'code': code,
                                 'short_name': '',
                                 'name': wmo_records[code]['name'],
                                 'unit': unit}

    # Write the final file
    with open(os.path.join(output_dir, 'grib2_table_4_5.csv'), 'w', newline='') as csvfile:
        writer = csv.DictWriter(
            csvfile, fieldnames=degrib_csv_reader.fieldnames, quoting=csv.QUOTE_NONNUMERIC)
        writer.writeheader()
        writer.writerow({'code': -4,
                         'short_name': '#' * warning_len, 'name': '#', 'unit': '#'})
        writer.writerow({'code': -3,
                         'short_name': warning_msg, 'name': '#', 'unit': '#'})
        writer.writerow({'code': -2,
                         'short_name': warning2_msg, 'name': '#', 'unit': '#'})
        writer.writerow({'code': -1,
                         'short_name': '#' * warning_len, 'name': '#', 'unit': '#'})
        for row in records:
            writer.writerow(row)


process_table_4_5()


# Process table referenced by 4.2 with "(Code table 4.X)" in the UnitComments_en
# column
def get_auxiliary_table(num):
    url = wmo_base_url + f'/GRIB2_CodeFlag_4_{num}_CodeTable_en.csv'
    print(f'Fetching {url}...')
    r = requests.get(url)
    lines = r.content.decode('utf-8').split('\n')
    wmo_csv_reader = csv.DictReader(lines)
    assert wmo_csv_reader.fieldnames == ['Title_en', 'SubTitle_en', 'CodeFlag',
                                         'Value', 'MeaningParameterDescription_en',
                                         'Note_en', 'UnitComments_en', 'Status']
    ret = ''
    for row in wmo_csv_reader:
        if ret:
            ret += "; "
        ret += row["CodeFlag"]
        ret += '='
        ret += row['MeaningParameterDescription_en']
    return ret

# Processes table 4.2 "Parameter Number by Product discipline and Parameter category"


def process_table_4_2():

    # Fetch the WMO table, which contains the full table for all product
    # disciplines and parameter category
    url = wmo_base_url + '/GRIB2_CodeFlag_4_2_CodeTable_en.csv'
    print(f'Fetching {url}...')
    r = requests.get(url)
    lines = r.content.decode('utf-8').split('\n')
    wmo_csv_reader = csv.DictReader(lines)
    assert wmo_csv_reader.fieldnames == ['Title_en', 'SubTitle_en', 'CodeFlag',
                                         'Value', 'MeaningParameterDescription_en', 'Note_en', 'UnitComments_en', 'Status']

    # dict_tables has as key a subtable (by product discipline and category)
    # and as value a table of 256 entries with the subcategory as index
    dict_tables = {}

    auxiliary_tables = {}

    for row in wmo_csv_reader:

        # Split values
        # "Product discipline 0 - Meteorological products, parameter category 1: moisture""
        # as product_type = 0 and category = 1
        m = re.match(r".+?([0-9]+).+?([0-9]+).+", row['SubTitle_en'])
        product_type = int(m.group(1))
        category = int(m.group(2))

        # Instantiate final table
        table_name = f'grib2_table_4_2_{product_type}_{category}.csv'
        if table_name not in dict_tables:
            dict_tables[table_name] = [None for i in range(256)]
        table = dict_tables[table_name]

        code = row['CodeFlag']

        # See https://github.com/wmo-im/GRIB2/issues/129
        if product_type == 0 and category == 1 and code == '149-191':
            code = '150-191'  # patching as there is a record 149

        if '-' in code:
            min_val, max_val = code.split('-')
            min_val = int(min_val)
            max_val = int(max_val)
            assert min_val < max_val
        else:
            min_val = int(code)
            max_val = int(code)

        name = row['MeaningParameterDescription_en']
        unit = row['UnitComments_en']
        m = re.match(r"\(Code table 4\.([0-9]+)\)", unit)
        if m is not None:
            auxiliary_table_num = m.group(1)
            if auxiliary_table_num not in auxiliary_tables:
                auxiliary_tables[auxiliary_table_num] = get_auxiliary_table(
                    auxiliary_table_num)
            unit = auxiliary_tables[auxiliary_table_num]

        for code in range(min_val, max_val+1):
            assert table[code] is None, (table_name, code, table[code], row)
            table[code] = {'subcat': code,
                           'short_name': '',
                           'name': name,
                           'unit': unit,
                           'unit_conv': 'UC_NONE',
                           'override_unit': True if m is not None else False}

    fieldnames = ['subcat', 'short_name', 'name', 'unit', 'unit_conv']

    # Iterate over all sub tables.
    for filename, wmo_table in dict_tables.items():
        degrib_filename = os.path.join(degrib_table_input_dir, filename)

        # Ingest corresponding degrib subtable, if it exists
        if os.path.exists(degrib_filename):
            degrib_csv_reader = csv.DictReader(
                open(degrib_filename, newline=''))
            assert degrib_csv_reader.fieldnames == fieldnames
            records = [None for i in range(256)]
            for row in degrib_csv_reader:
                code = int(row['subcat'])
                assert records[code] is None
                row['subcat'] = code
                records[code] = row

            # Merge WMO and degrib subtable
            # degrib entries have precedence, unless there are reserved ranges
            # and WMO entry is not reserved
            for code in range(len(records)):
                if wmo_table[code] is not None:
                    if records[code] is None:
                        records[code] = wmo_table[code]
                    elif 'Reserved' in records[code]['name'] and not 'Reserved' in wmo_table[code]['name']:
                        #print('Patching ', records[code], ' with ', wmo_table[code])
                        records[code] = wmo_table[code]
                    if wmo_table[code]['override_unit']:
                        records[code]['unit'] = wmo_table[code]['unit']

        else:
            records = wmo_table

        # Write final subtable
        with open(os.path.join(output_dir, filename), 'w', newline='') as csvfile:
            writer = csv.DictWriter(
                csvfile, fieldnames=fieldnames, quoting=csv.QUOTE_NONNUMERIC)
            writer.writeheader()
            writer.writerow({'subcat': -4,
                             'short_name': '#' * warning_len, 'name': '#', 'unit': '#', 'unit_conv': '#'})
            writer.writerow({'subcat': -3,
                             'short_name': warning_msg, 'name': '#', 'unit': '#', 'unit_conv': '#'})
            writer.writerow({'subcat': -2,
                             'short_name': warning2_msg, 'name': '#', 'unit': '#', 'unit_conv': '#'})
            writer.writerow({'subcat': -1,
                             'short_name': '#' * warning_len, 'name': '#', 'unit': '#', 'unit_conv': '#'})
            for row in records:
                if row:
                    writer.writerow({x: row[x] for x in fieldnames})


process_table_4_2()
